import scrapy
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule
from douban.items import DoubanItem

class DoubanpaSpider(CrawlSpider):
    name = 'doubanpa'
    # allowed_domains = ['www.xxx.com']
    start_urls = ['https://movie.douban.com/top250']
    #提取出换页
    link=LinkExtractor(allow=r'.*?start=\d+')
    rules = (
        Rule(link, callback='parse_item', follow=True),
    )

    def parse_item(self, response):
        # print(response)
        div_list=response.xpath('//*[@id="content"]/div/div[1]/ol/li')
        for div in div_list:
            item=DoubanItem()
            film_id=div.xpath('.//em/text()').extract_first()#编号
            film_CNname=div.xpath('.//a/span[1]/text()').extract_first()#中文名
            film_OTname = div.xpath('.//a/span[2]/text()').extract_first()#其他名
            film_introduce=div.xpath('.//div[@class="bd"]/p//text()[1]').extract_first()#导演主演
            film_comments=div.xpath('.//div[@class="star"]/span[4]/text()').extract_first()#多少评价
            film_style=div.xpath('.//div[@class="bd"]/p//text()[2]').extract_first()#年份风格
            film_prime=div.xpath('.//p[@class="quote"]/span[1]/text()').extract_first()#介绍
            # print(film_prime)
            #提交到管道
            item['film_id']=film_id
            item['film_CNname']=film_CNname
            item['film_OTname'] = film_OTname
            item['film_introduce'] = film_introduce
            item['film_comments'] = film_comments
            item['film_style'] = film_style
            item['film_prime'] = film_prime
            yield item